##### Define functions
##### Create 'not in' operator
"%!in%" <- function(x,table) match(x,table, nomatch = 0) == 0

##### Prepare object to write into a file
prepare2write <- function (x) {
  
  x2write <- cbind(rownames(x), x)
  colnames(x2write) <- c("Gene",colnames(x))
  return(x2write)
}

##### Combine sample expression profile with reference datasets. This function outputs a vector with first element containing the merged data and second element containing merged targets info
combineDataets <- function(sample_name, sample_counts, ref_dataset) {
  
  ##### Read file with reference datasets information
  DatasetInput=read.table(ref_dataset, sep="\t", as.is=TRUE, header=TRUE, row.names=1)
  
  ##### Extract info about target file for the first dataset
  fileInfo = strsplit(DatasetInput[,"Target_file"], split='/', fixed=TRUE)
  targetFile <- read.table(DatasetInput[1,"Target_file"], sep="\t", as.is=TRUE, header=TRUE)[,c(1:4)]
  rownames(targetFile) <- targetFile[,"Sample_name"]
  targetFile <- cbind(targetFile[,2:4],rownames(DatasetInput[1,]))
  colnames(targetFile)[ncol(targetFile)] <- "Dataset"
  
  if ( nrow(DatasetInput) > 1 ) {
    for ( i in 2:nrow(DatasetInput) ) {
        
      ##### Create a temporary object to store info from the remaining target files
      targetFileTmp <- read.table(DatasetInput[i,"Target_file"], sep="\t", as.is=TRUE, header=TRUE)[,c(1:4)]
      rownames(targetFileTmp) <- targetFileTmp[,"Sample_name"]
      targetFileTmp <- cbind(targetFileTmp[,2:4],rownames(DatasetInput[i,]))
      colnames(targetFileTmp)[ncol(targetFileTmp)] <- "Dataset"
        
      targetFile <- rbind(targetFile, targetFileTmp)
    }
  }  
  
  ##### Add sample info
  sampleTargetFile <- data.frame(sample_counts, sample_name, NA, sample_name)
  names(sampleTargetFile) <- names(targetFile)
  rownames(sampleTargetFile) <- sample_name
  targetFile <- rbind( targetFile, sampleTargetFile )
  
  ##### Make syntactically valid names
  rownames(targetFile) <- make.names(rownames(targetFile))
  
  ##### Read sample read count file and combine it with reference datasets
  datasets.comb=read.table(sample_counts, sep="\t", as.is=TRUE, header=FALSE, row.names=NULL)
  names(datasets.comb) <- c("", sample_name)
      
  ##### list genes present in the read count file
  gene_list <- as.vector(datasets.comb[,1])
      
  ##### Loop through the expression data from different datasets and merge them into one matrix
  for ( data_matrix in DatasetInput[ , "Expression_matrix" ] ) {
    
    ##### Add data from the reference datasets
    dataset <- as.data.frame( read.table(data_matrix, header=TRUE, sep="\t", row.names=NULL) )
      
    ##### list genes present in individal files
    gene_list <- c( gene_list, as.vector(dataset[,1]) )
    
    ##### Merge the expression datasets and make sure that the genes order is the same
    datasets.comb <- merge( datasets.comb, dataset, by=1, all = FALSE, sort= TRUE)
      
    ##### Remove per-sample data for merged samples to free some memory
    rm(dataset)
  }
  
  ##### Use gene IDs as rownames
  rownames(datasets.comb) <- datasets.comb[,1]
  datasets.comb <- datasets.comb[, -1]
  
  ##### Make syntactically valid names
  colnames(datasets.comb) <- make.names(colnames(datasets.comb))
  
  ##### Make sure that the target file contains info only about samples present in the data matrix
  targetFile <- targetFile[ rownames(targetFile) %in% colnames(datasets.comb),  ]
  
  ##### Make sure that the samples order in the data matrix is the same as in the target file 
  datasets.comb <- datasets.comb[ , rownames(targetFile) ]
  
  ##### Identify genes that were not present across all per-sampel files and were ommited in the merged matrix
  gene_list <- unique(gene_list)
  gene_list.missing <- gene_list[ gene_list %!in% rownames(datasets.comb) ]
  
  ##### Write list of missing genes into a file
  if ( length(gene_list.missing) > 0 ) {
    write.table(prepare2write(gene_list.missing), file = paste0(params$report_dir, "/", sample_name,".missing_genes.txt"), sep="\t", quote=FALSE, row.names=TRUE, append = FALSE )
  }
  
    return( list(datasets.comb, targetFile) )
}


##### Assign colours to different groups
getTargetsColours <- function(targets) {
  
##### Predefined selection of colours for groups
targets.colours <- c("red","blue","green","darkgoldenrod","darkred","deepskyblue", "coral", "cornflowerblue", "chartreuse4", "bisque4", "chocolate3", "cadetblue3", "darkslategrey", "lightgoldenrod4", "mediumpurple4", "orangered3","indianred1","blueviolet","darkolivegreen4","darkgoldenrod4","firebrick3","deepskyblue4", "coral3", "dodgerblue1", "chartreuse3", "bisque3", "chocolate4", "cadetblue", "darkslategray4", "lightgoldenrod3", "mediumpurple3", "orangered1")
  
  f.targets <- factor(targets)
  vec.targets <- targets.colours[1:length(levels(f.targets))]
  targets.colour <- rep(0,length(f.targets))
  for(i in 1:length(f.targets))
    targets.colour[i] <- vec.targets[ f.targets[i]==levels(f.targets)]
  
  return( list(vec.targets, targets.colour) )
}

##### Assign colours to different datasets
getDatasetsColours <- function(datasets) {
  
  ##### Predefined selection of colours for datasets
  datasets.colours <- c("dodgerblue","firebrick","lightslategrey","darkseagreen","orange","darkcyan","bisque", "coral2", "cadetblue3","red","blue","green")
  
  f.datasets <- factor(datasets)
  vec.datasets <- datasets.colours[1:length(levels(f.datasets))]
  datasets.colour <- rep(0,length(f.datasets))
  for(i in 1:length(f.datasets))
    datasets.colour[i] <- vec.datasets[ f.datasets[i]==levels(f.datasets)]
  
  return( list(vec.datasets, datasets.colour) )
}

##### Perform PCA. This function outputs a list with dataframe and samples colouring info ready for plotting
pca <- function(data, targets) {

  ##### Keep only genes with variance > 0 across all samples
  rsd <- apply(data,1,sd)
  data.subset <- data[rsd>0,]
  
  ##### Perform PCA
  data.subset_pca <- prcomp(t(data.subset), scale=FALSE)
  
  ##### Get variance importance for all principal components
  importance_pca <- summary(data.subset_pca)$importance[2,]
  importance_pca <- paste(round(100*importance_pca, 2), "%", sep="")
  names(importance_pca) <- names(summary(data.subset_pca)$importance[2,])
    
  ##### Prepare data frame
  data.subset_pca.df <- data.frame(targets$Target, targets$Dataset, data.subset_pca$x[,"PC1"], data.subset_pca$x[,"PC2"], data.subset_pca$x[,"PC3"])
  colnames(data.subset_pca.df) <- c("Target", "Dataset", "PC1", "PC2", "PC3")
  
  ##### Assigne colours to targets and datasets
  targets.colour <- getTargetsColours(target$Target)
  datasets.colour <- getDatasetsColours(target$Dataset)
  
  ##### Create a list with dataframe and samples colouring info
  pca.list <- list(data.subset_pca.df, importance_pca, targets.colour, datasets.colour)
  names(pca.list) <- c("pca.df", "importance_pca", "targets", "datasets")
  
  return( pca.list )
}


##### Customise the "overview" function from made4 package (present hierarchical clustering using ClassDiscovery package)
overview <- function (dataset, labels = NULL, title = "", classvec = NULL, 
    hc = TRUE, boxplot = TRUE, hist = TRUE, returnTree = FALSE) 
{
    cols = NULL
    if (sum(boxplot, hist, hc) == 3) 
        layout(matrix(c(1, 1, 2, 3), 2, 2, byrow = TRUE))
    if (sum(boxplot, hist, hc) == 2) 
        layout(matrix(c(1, 1, 2, 2), 2, 2, byrow = TRUE))
    classvec = as.factor(classvec)
    if (is.null(labels)) 
        labels = colnames(dataset)
    if (!is.null(labels)) 
        labels = as.character(labels)
    if (!is.null(classvec)) {
        cols = getcol(as.numeric(classvec))
        layout(matrix(c(1, 1, 2, 2, 3, 4), 3, 2, byrow = TRUE), 
            heights = c(1.5, 0.1, 0.5))
    }
    distEisen <- function(x, use = "pairwise.complete.obs") {
        co.x <- cor(x, use = use)
        dist.co.x <- 1 - co.x
        return(as.dist(dist.co.x))
    }
    colhc <- function(hc, classvec) {
        margins = par()$mar
        par(mar = c(0.5, margins[1], 0, margins[2]))
        nc = length(as.character(classvec))
        colInd = hc$order
        image(cbind(1:nc), col = cols[colInd], axes = FALSE)
        par(mar = margins)
    }
    if (hc) {
        if (!inherits(dataset, "matrix")) {
            if (!inherits(dataset, "AffyBatch")) {
                dataset <- array2ade4(dataset, trans = FALSE)
            }
            if (inherits(dataset, "AffyBatch")) {
                dataset = exprs(dataset)
            }
        }
        hc = hclust(distEisen(dataset), method = "ave")
        #plot(hc, hang = -1, labels = labels, main = paste("", 
        #    title, sep = " "), sub = "", xlab = "")
        #if (!missing(classvec)) 
        #    colhc(hc, classvec)
        
        ##### Use ClassDiscovery package to generate dendrogram
        plotColoredClusters(hc, labs=labels, cols=getTargetsColours(classvec)[[2]], main = title)
        ##### Add a legend
        legend("topright", legend=levels(classvec), col=getTargetsColours(classvec)[[1]], lty=1, cex=1.4, bg="white", box.col="white")
    }
    if (boxplot) {
        if (!inherits(dataset, "AffyBatch")) {
            dataset <- array2ade4(dataset, trans = FALSE)
        }
        boxplot(dataset, main = paste("boxplot", title, sep = " "), 
            names = labels, par(las = 2), col = cols)
    }
    if (hist) {
        if (!inherits(dataset, "AffyBatch")) {
            dataset <- array2ade4(dataset, trans = FALSE)
            dataset = as.matrix(dataset)
        }
        hist(dataset, xlab = "", main = paste("Histogram", title, 
            sep = " "))
    }
    if (returnTree) 
        return(hc)
}
##### Load libraries
suppressMessages(library(edgeR))
suppressMessages(library(preprocessCore))
suppressMessages(library(rapportools))
suppressMessages(library(plotly))
suppressMessages(library(edgeR))
suppressMessages(library(ClassDiscovery))
suppressMessages(library(made4))
suppressMessages(library(ade4))
##### Load reference datasets
##### Create a list with reference datasets
ref_datasets <- c("mix", "core_gastrointestinal", "developmental_gastrointestinal", "pancreas", "bailey", "collisson", "moffitt")
ref_datasets.list <- vector("list", length(ref_datasets))
names(ref_datasets.list) <- ref_datasets

##### Create a list with subtype-associated genes
ref_genes <- c("bailey", "collisson", "moffitt")
ref_genes.list <- vector("list", length(ref_genes))
names(ref_genes.list) <- ref_genes

##### Read in reference datasets and merge them with sample data. This part outputs a vector with first element containing the merged data and second element containing merged targets info
ref_datasets.list[["mix"]] <- combineDataets(params$sample_name, params$count_file, params$datasets_mix)
ref_datasets.list[["core_gastrointestinal"]] <- combineDataets(params$sample_name, params$count_file, params$datasets_core_gastrointestinal)
ref_datasets.list[["developmental_gastrointestinal"]] <- combineDataets(params$sample_name, params$count_file, params$datasets_developmental_gastrointestinal)
ref_datasets.list[["pancreas"]] <- combineDataets(params$sample_name, params$count_file, params$datasets_pancreas)
ref_datasets.list[["bailey"]] <- combineDataets(params$sample_name, params$count_file, params$datasets_bailey)
ref_datasets.list[["collisson"]] <- combineDataets(params$sample_name, params$count_file, params$datasets_collisson)
ref_datasets.list[["moffitt"]] <- combineDataets(params$sample_name, params$count_file, params$datasets_moffitt)

##### Read in subtype-associated genes
ref_genes.list[["collisson"]] <- read.table(params$genes_collisson, sep="\t", as.is=TRUE, header=FALSE, row.names=NULL)
ref_genes.list[["bailey"]] <- read.table(params$genes_bailey, sep="\t", as.is=TRUE, header=FALSE, row.names=NULL)
ref_genes.list[["moffitt"]] <- read.table(params$genes_moffitt, sep="\t", as.is=TRUE, header=FALSE, row.names=NULL)
##### Data transformation and filtering
##### For differential expression and related analyses, gene expression is rarely considered at the level of raw counts since libraries sequenced at a greater depth will result in higher counts. Rather, it is common practice to transform raw counts onto a scale that accounts for such library size differences. Here we convert the read count data into log2-counts per million (***log-CPM***) using function from *[edgeR](https://bioconductor.org/packages/release/bioc/html/edgeR.html)* package. Genes with very low counts across all libraries provide little evidence for differential expression. In the biological point of view, a gene must be expressed at some minimal level before it is likely to be translated into a protein or to be biologically important. In addition, the pronounced discretenes of these counts interferes with some of the statistical approximations that are used later in the pipeline. These genes should be filtered out prior to further analysis.

##### Loop through combined datasets
for ( ref in names(ref_datasets.list) ) {
  
  counts <- ref_datasets.list[[ref]][[1]]
  target <- ref_datasets.list[[ref]][[2]]
  
  ##### Create EdgeR DGEList object
  y <- DGEList(counts=counts,  group=target$Target)
  
  ##### Add datasets name for each sample
  y$samples$dataset <- target$Dataset
  
  ##### Filtering to remove low expressed genes. Users should filter with CPM rather than filtering on the counts directly, as the latter does not account for differences in library sizes between samples. Here we keep only genes that have CPM of 1
  keep <- rowSums(cpm(y)>1) >= ncol(counts)/10
  y.filtered <- y[keep, , keep.lib.sizes=FALSE]
  
  ref_datasets.list[[ref]][[3]] <- y.filtered
}

# cat("The CPM of 1 (cut-off for removing low expressed genes) corresponds to", round(min(as.numeric(colSums(counts)*1e-6)), digits=0), "reads in sample with the lowest sequencing depth, and", round(max(as.numeric(colSums(counts)*1e-6)), digits=0), "reads in sample with the greatest sequencing depth\n")

cat(nrow(y.filtered$counts), "genes remained after filtering low expressed genes, out of the total", nrow(counts), "input genes\n\n")
18045 genes remained after filtering low expressed genes, out of the total 54516 input genes
##### Data normalisation
##### During the sample preparation or sequencing process, external factors that are not of biological interest can affect the expression of individual samples. For example, samples processed in the first batch of an experiment can have higher expression overall when compared to samples processed in a second batch. It is assumed that all samples should have a similar range and distribution of expression values. Normalisation for sample-specific effectss is required to ensure that the expression distributions of each sample are similar across the entire experiment. Normalisation by the method of *[trimmed mean of M-values](https://www.ncbi.nlm.nih.gov/pubmed/20196867) (TMM)* is performed using the *calcNormFactors* function in *[edgeR](https://bioconductor.org/packages/release/bioc/html/edgeR.html)*. The normalisation factors calculated here are used as a scaling factor for the library sizes. TMM is the recommended for most RNA-Seq data where the majority (more than half) of the genes are believed not differentially expressed between any pair of the samples.

##### Adjust for RNA composition effect. Calculate scaling factors for the library sizes with calcNormFactors function using trimmed mean of M-values (TMM) between each pair of samples. Note, that the raw read counts are used to calculate the normalisation factors

##### Loop through combined datasets
for ( ref in names(ref_datasets.list) ) {
  
  y.filtered <- ref_datasets.list[[ref]][[3]]
  
  y.filtered.norm <- calcNormFactors(y.filtered, method = "TMM")
  
  ##### Transformations from the raw-scale to CPM
  y.filtered.norm.cpm <- cpm(y.filtered.norm, normalized.lib.sizes=TRUE, log=TRUE, prior.count=0.25)
  
  ref_datasets.list[[ref]][[3]] <- y.filtered.norm.cpm
}
##### Principal component analysis (PCA)
##### Loop through combined datasets and perform PCA
for ( ref in names(ref_datasets.list) ) {
  
  target <- ref_datasets.list[[ref]][[2]]
  data <- ref_datasets.list[[ref]][[3]]
  
  ref_datasets.list[[ref]][[4]] <- pca(data, target)
}

Comparison across tumour types

Principal component analysis (PCA), hierarchical clustering and between-group analysis (BGA) were performed to present the data from investigated sample in the context of patients with various tumour types derived from TCGA cohort. The sample expression profile is projected against the four patient cohorts representing (1) unrelated tumour types (distant tumours), (2) core gastrointestinal tumours, (3) developmental gastrointestinal tumours and (4) pancreas-related lesions/tissues.

Distant

PCA reduces the dimensionality of data while retaining most of the variation in the dataset, making it possible to visually assess similarities and differences between the investigated sample and the various patient cohorts. The expression profile of investigated sample is projected against unrelated tumour types, including (1) acute myeloid leukaemia, (2) brain lower grade glioma, (3) breast invasive carcinoma, (4) glioblastoma multiforme, (5) lung Squamous cell carcinoma and (6) pancreatic ductal adenocarcinoma (PDAC).

suppressMessages(library(plotly))
##### Generate PCA plots for distant tumour types
ref <- "mix"

target <- ref_datasets.list[[ref]][[2]]
pca.df <- ref_datasets.list[[ref]][[4]]$pca.df
importance_pca <- ref_datasets.list[[ref]][[4]]$importance_pca
targets.colour <- ref_datasets.list[[ref]][[4]]$targets
datasets.colour <- ref_datasets.list[[ref]][[4]]$datasets

##### Generate PCA 2-D plot (colour targets)
p1 <- plot_ly(pca.df, x = ~PC1, y = ~PC2, color = ~Target, text=paste(target$Target, rownames(pca.df), sep=": "), colors = targets.colour[[1]], type='scatter', mode = "markers", marker = list(size=10, opacity = 0.7, symbol="circle"), width = 800, height = 600) %>%
  layout(title = "", xaxis = list(title = paste( "PC1", " (",importance_pca["PC1"],")",sep="")), yaxis = list(title = paste( "PC2", " (",importance_pca["PC2"],")",sep="")), margin = list(l=50, r=50, b=50, t=20, pad=4), autosize = FALSE, showlegend = TRUE, legend = list(orientation = "v", y = 0.9))

##### Generate PCA 2-D plot (colour datasets)
#p2 <- plot_ly(pca.df, x = ~PC1, y = ~PC2, color = ~Dataset, text=paste(target$Dataset, rownames(pca.df), sep=": "), colors = datasets.colour[[1]], type='scatter', mode = "markers", marker = list(size=10, opacity = 0.7, symbol="circle"), width = 800, height = 600) %>%
#  layout(title = "", xaxis = list(title = paste( "PC1", " (",importance_pca["PC1"],")",sep="")), yaxis = list(title = paste( "PC2", " (",importance_pca["PC2"],")",sep="")), margin = list(l=50, r=50, b=50, t=20, pad=4), autosize = FALSE, showlegend = TRUE, legend = list(orientation = "v", y = 0.9))

p1
#p2

##### Save PCA plots as html
#htmlwidgets::saveWidget(as_widget(p1), paste0(normalizePath(params$report_dir), "/mix_PCA_2d_targets.html"), selfcontained = TRUE)

##### Detach plotly package. Otherwise it clashes with other graphics devices
detach("package:plotly", unload=FALSE)

Hierarchical clustering with reference samples coloured according to corresponding tumour types.

##### Generate dendrogram with samples coloured according to distant tumour types
data <- ref_datasets.list[[ref]][[3]]

overview(data, classvec=as.factor(target$Target), labels=colnames(data), title="", boxplot=FALSE, hist=FALSE)

BGA is a supervised classification method (Culhane et al., (2002)), which performs comparably to a range of supervised classification methods, including support vector machines and artificial neural networks.

Top: projection of samples (coloured by groups), with samples annotation (left) and with group annotation (right), showing their separation on the first two discriminating BGA axes. Samples with a strong association are projected in the same direction from the origin. The greater the distance from the origin the stronger the association.

Bottom: graph of positions of genes on the same BGA axis (left) and plot of the eigenvalues (right). Genes at the ends of the axis are most discriminating for that group.

##### Perform BGA and plot the results
# Between-group analysis (BGA) is a supervised classification method (Culhane et al., (2002), https://www.ncbi.nlm.nih.gov/pubmed/12490444), which performs comparably to a range of supervised classification methods, including support vector machines and artificial neural networks. The basis of BGA is to ordinate the groups rather than the individual samples. BGA is implemented in the MADE4 package (Culhane et al., (2005), https://www.ncbi.nlm.nih.gov/pubmed/15797915). It also uses functions from ADE4 package (https://bioconductor.org/packages/release/bioc/html/made4.html and http://pbil.univ-lyon1.fr/ade4/home.php?lang=eng).
bga.res<-bga(data, type="coa", classvec=as.factor(target$Target))

##### Plot BGA results
plot(bga.res,  arraycol=targets.colour[[1]])

Gastrointestinal (core)

PCA reduces the dimensionality of data while retaining most of the variation in the dataset, making it possible to visually assess similarities and differences between the investigated sample and the various patient cohorts. The expression profile of investigated sample is projected against core gastrointestinal tumours, including (1) esophageal carcinoma, (2) stomach adenocarcinoma, (3) colon adenocarcinoma and (4) rectum adenocarcinoma, (as defined in the TCGA PanCan paper), as well as (5) pancreatic ductal adenocarcinoma (PDAC).

suppressMessages(library(plotly))
##### Generate PCA plots for core gastrointestinal tumours
ref <- "core_gastrointestinal"

target <- ref_datasets.list[[ref]][[2]]
pca.df <- ref_datasets.list[[ref]][[4]]$pca.df
importance_pca <- ref_datasets.list[[ref]][[4]]$importance_pca
targets.colour <- ref_datasets.list[[ref]][[4]]$targets
datasets.colour <- ref_datasets.list[[ref]][[4]]$datasets

##### Generate PCA 2-D plot (colour targets)
p1 <- plot_ly(pca.df, x = ~PC1, y = ~PC2, color = ~Target, text=paste(target$Target, rownames(pca.df), sep=": "), colors = targets.colour[[1]], type='scatter', mode = "markers", marker = list(size=10, opacity = 0.7, symbol="circle"), width = 800, height = 600) %>%
  layout(title = "", xaxis = list(title = paste( "PC1", " (",importance_pca["PC1"],")",sep="")), yaxis = list(title = paste( "PC2", " (",importance_pca["PC2"],")",sep="")), margin = list(l=50, r=50, b=50, t=20, pad=4), autosize = FALSE, showlegend = TRUE, legend = list(orientation = "v", y = 0.9))

p1
##### Save PCA plots as html
#htmlwidgets::saveWidget(as_widget(p1), paste0(normalizePath(params$report_dir), "/mix_PCA_2d_targets.html"), selfcontained = TRUE)

##### Detach plotly package. Otherwise it clashes with other graphics devices
detach("package:plotly", unload=FALSE)

Hierarchical clustering with reference samples coloured according to core gastrointestinal tumours.

##### Generate dendrogram with samples coloured according to core gastrointestinal tumours
data <- ref_datasets.list[[ref]][[3]]

overview(data, classvec=as.factor(target$Target), labels=colnames(data), title="", boxplot=FALSE, hist=FALSE)

BGA is a supervised classification method (Culhane et al., (2002)), which performs comparably to a range of supervised classification methods, including support vector machines and artificial neural networks.

Top: projection of samples (coloured by groups), with samples annotation (left) and with group annotation (right), showing their separation on the first two discriminating BGA axes. Samples with a strong association are projected in the same direction from the origin. The greater the distance from the origin the stronger the association.

Bottom: graph of positions of genes on the same BGA axis (left) and plot of the eigenvalues (right). Genes at the ends of the axis are most discriminating for that group.

##### Perform BGA and plot the results
# Between-group analysis (BGA) is a supervised classification method (Culhane et al., (2002), https://www.ncbi.nlm.nih.gov/pubmed/12490444), which performs comparably to a range of supervised classification methods, including support vector machines and artificial neural networks. The basis of BGA is to ordinate the groups rather than the individual samples. BGA is implemented in the MADE4 package (Culhane et al., (2005), https://www.ncbi.nlm.nih.gov/pubmed/15797915). It also uses functions from ADE4 package (https://bioconductor.org/packages/release/bioc/html/made4.html and http://pbil.univ-lyon1.fr/ade4/home.php?lang=eng).
bga.res<-bga(data, type="coa", classvec=as.factor(target$Target))

##### Plot BGA results
plot(bga.res,  arraycol=targets.colour[[1]])

Gastrointestinal (developmental)

PCA reduces the dimensionality of data while retaining most of the variation in the dataset, making it possible to visually assess similarities and differences between the investigated sample and the various patient cohorts. The expression profile of investigated sample is projected against developmental gastrointestinal tumours, including (1) liver hepatocellular carcinoma, (2) cholangiocarcinoma and (3) pancreatic ductal adenocarcinoma (PDAC) (as defined in the TCGA PanCan paper).

suppressMessages(library(plotly))
##### Generate PCA plots for developmental gastrointestinal tumours
ref <- "developmental_gastrointestinal"

target <- ref_datasets.list[[ref]][[2]]
pca.df <- ref_datasets.list[[ref]][[4]]$pca.df
importance_pca <- ref_datasets.list[[ref]][[4]]$importance_pca
targets.colour <- ref_datasets.list[[ref]][[4]]$targets
datasets.colour <- ref_datasets.list[[ref]][[4]]$datasets

##### Generate PCA 2-D plot (colour targets)
p1 <- plot_ly(pca.df, x = ~PC1, y = ~PC2, color = ~Target, text=paste(target$Target, rownames(pca.df), sep=": "), colors = targets.colour[[1]], type='scatter', mode = "markers", marker = list(size=10, opacity = 0.7, symbol="circle"), width = 800, height = 600) %>%
  layout(title = "", xaxis = list(title = paste( "PC1", " (",importance_pca["PC1"],")",sep="")), yaxis = list(title = paste( "PC2", " (",importance_pca["PC2"],")",sep="")), margin = list(l=50, r=50, b=50, t=20, pad=4), autosize = FALSE, showlegend = TRUE, legend = list(orientation = "v", y = 0.9))

p1
##### Save PCA plots as html
#htmlwidgets::saveWidget(as_widget(p1), paste0(normalizePath(params$report_dir), "/mix_PCA_2d_targets.html"), selfcontained = TRUE)

##### Detach plotly package. Otherwise it clashes with other graphics devices
detach("package:plotly", unload=FALSE)

Hierarchical clustering with reference samples coloured according to developmental gastrointestinal tumours.

##### Generate dendrogram with samples coloured according to developmental gastrointestinal tumours
data <- ref_datasets.list[[ref]][[3]]

overview(data, classvec=as.factor(target$Target), labels=colnames(data), title="", boxplot=FALSE, hist=FALSE)

BGA is a supervised classification method (Culhane et al., (2002)), which performs comparably to a range of supervised classification methods, including support vector machines and artificial neural networks.

Top: projection of samples (coloured by groups), with samples annotation (left) and with group annotation (right), showing their separation on the first two discriminating BGA axes. Samples with a strong association are projected in the same direction from the origin. The greater the distance from the origin the stronger the association.

Bottom: graph of positions of genes on the same BGA axis (left) and plot of the eigenvalues (right). Genes at the ends of the axis are most discriminating for that group.

##### Perform BGA and plot the results
# Between-group analysis (BGA) is a supervised classification method (Culhane et al., (2002), https://www.ncbi.nlm.nih.gov/pubmed/12490444), which performs comparably to a range of supervised classification methods, including support vector machines and artificial neural networks. The basis of BGA is to ordinate the groups rather than the individual samples. BGA is implemented in the MADE4 package (Culhane et al., (2005), https://www.ncbi.nlm.nih.gov/pubmed/15797915). It also uses functions from ADE4 package (https://bioconductor.org/packages/release/bioc/html/made4.html and http://pbil.univ-lyon1.fr/ade4/home.php?lang=eng).
bga.res<-bga(data, type="coa", classvec=as.factor(target$Target))

##### Plot BGA results
plot(bga.res,  arraycol=targets.colour[[1]])

Molecular classification

Principal component analysis (PCA), hierarchical clustering and between-group analysis (BGA) were performed to project investigated sample in the context of TCGA PAAD samples classified based mRNA subtypes reported by Bailey et al. (2016), Moffitt et al. (2015) and Collisson et al. (2011). The molecular classification aids patient assignment into less heterogeneous and more appropriate group regarding the metastatic risk and the therapeutic response, with the consequences of better predicting evolution and better orienting the treatment. A recent report by Birnbaum DJ1 et al. (2018) reviews the association between PDAC molecular subtypes and drugs sensitivity.

Bailey

PCA reduces the dimensionality of data while retaining most of the variation in the dataset, making it possible to visually assess similarities and differences between the investigated sample and the various patient cohorts.

suppressMessages(library(plotly))
##### Generate PCA plots for TCGA PAAD samples stratified according to Bailey classification
ref <- "bailey"

##### PCA using subtype-specific genes only
target <- ref_datasets.list[[ref]][[2]]
data <- ref_datasets.list[[ref]][[3]]
data <- data[ rownames(data) %in% unlist(ref_genes.list[[ref]]), ]
ref_datasets.list[[ref]][[5]] <- pca(data, target)

pca.df <- ref_datasets.list[[ref]][[5]]$pca.df
importance_pca <- ref_datasets.list[[ref]][[5]]$importance_pca
targets.colour <- ref_datasets.list[[ref]][[5]]$targets
datasets.colour <- ref_datasets.list[[ref]][[5]]$datasets

##### Generate PCA 2-D plot (colour targets)
p1 <- plot_ly(pca.df, x = ~PC1, y = ~PC2, color = ~Target, text=paste(target$Target, rownames(pca.df), sep=": "), colors = targets.colour[[1]], type='scatter', mode = "markers", marker = list(size=10, opacity = 0.7, symbol="circle"), width = 800, height = 600) %>%
  layout(title = "", xaxis = list(title = paste( "PC1", " (",importance_pca["PC1"],")",sep="")), yaxis = list(title = paste( "PC2", " (",importance_pca["PC2"],")",sep="")), margin = list(l=50, r=50, b=50, t=20, pad=4), autosize = FALSE, showlegend = TRUE, legend = list(orientation = "v", y = 0.9))


##### Generate PCA 3-D plot  (colour targets)
#p2 <- plot_ly(pca.df, x = ~PC1, y = ~PC2, z = ~PC3, color = ~Target, text=paste(target$Target, rownames(pca.df), sep=": "), colors = targets.colour[[1]], type='scatter3d', mode = "markers", marker = list(size=8, opacity = 0.7, symbol="circle"), width = 800, height = 800) %>%
#  layout(title = "", xaxis = list(title = paste( "PC1", " (",importance_pca["PC1"],")",sep="")), yaxis = list(title = paste( "PC2", " (",importance_pca["PC2"],")",sep="")), zaxis = list(title = paste( "PC3", " (",importance_pca["PC3"],")",sep="")) , margin = list(l=50, r=50, b=50, t=10, pad=4), autosize = FALSE, showlegend = TRUE, legend = list(orientation = "v", y = 0.9))

p1
#p2

##### Save PCA plots as html
#htmlwidgets::saveWidget(as_widget(p1), paste0(normalizePath(params$report_dir), "/mix_PCA_2d_targets.html"), selfcontained = TRUE)
#htmlwidgets::saveWidget(as_widget(p2), paste0(normalizePath(params$report_dir), "/", "mix_PCA_3d_targets.html"), selfcontained = TRUE)

##### Detach plotly package. Otherwise it clashes with other graphics devices
detach("package:plotly", unload=FALSE)

Hierarchical clustering with reference samples coloured according to Bailey molecular subtypes.

##### Generate dendrogram with samples coloured according to Bailey classification
overview(data, classvec=as.factor(target$Target), labels=colnames(data), title="", boxplot=FALSE, hist=FALSE)

BGA is a supervised classification method (Culhane et al., (2002)), which performs comparably to a range of supervised classification methods, including support vector machines and artificial neural networks.

Top: projection of samples (coloured by groups), with samples annotation (left) and with group annotation (right), showing their separation on the first two discriminating BGA axes. Samples with a strong association are projected in the same direction from the origin. The greater the distance from the origin the stronger the association.

Bottom: graph of positions of genes on the same BGA axis (left) and plot of the eigenvalues (right). Genes at the ends of the axis are most discriminating for that group.

##### Perform BGA and plot the results
# Between-group analysis (BGA) is a supervised classification method (Culhane et al., (2002), https://www.ncbi.nlm.nih.gov/pubmed/12490444), which performs comparably to a range of supervised classification methods, including support vector machines and artificial neural networks. The basis of BGA is to ordinate the groups rather than the individual samples. BGA is implemented in the MADE4 package (Culhane et al., (2005), https://www.ncbi.nlm.nih.gov/pubmed/15797915). It also uses functions from ADE4 package (https://bioconductor.org/packages/release/bioc/html/made4.html and http://pbil.univ-lyon1.fr/ade4/home.php?lang=eng).
bga.res<-bga(data, type="coa", classvec=as.factor(target$Target))

##### Plot BGA results
plot(bga.res,  arraycol=targets.colour[[1]])


This classification includes 4 tumour subtypes associated with different molecular pathways:

  1. Squamous - associated with a high rate of TP53 and KDM6A mutations and genes involved in inflammation, hypoxia response, metabolic reprogramming, TGFB signaling, MYC pathway activation, autophagy and upregulated expression of TP63deltaN and its target genes. It has the worst survival compared to the other three subtypes. Demonstrates resistance to gemcitabine and docetaxel and is sensitive to oxaliplatine and 5-FU;
  2. Pancreatic progenitor - expressed genes involved in early pancreatic development (PDX1, MNX1, HNF4G, HNF4A, HNF1B, HNF1A, FOXA2, FOXA3, and HES1). Demonstrates resistance to docetaxel and SN-38;
  3. ADEX (aberrantly-differentiated endocrine exocrine) - expressed genes involved in later stages of endocrine and exocrine pancreas development and differentiation (NR5A2, MIST1, RBPJL, INS, NEUROD1, and MAFA);
  4. Immunogenic - characterized by immune infiltration associated with B and T-cell signaling pathway, antigen presentation, CD4+ T-cell, CD8+ T-cell, and Toll-like receptor signaling pathway. Potentially can be targeted using immune modulators

Collisson

PCA reduces the dimensionality of data while retaining most of the variation in the dataset, making it possible to visually assess similarities and differences between the investigated sample and the various patient cohorts.

suppressMessages(library(plotly))
##### Generate PCA plots for TCGA PAAD samples stratified according to Collisson classification
ref <- "collisson"

##### PCA using subtype-specific genes only
target <- ref_datasets.list[[ref]][[2]]
data <- ref_datasets.list[[ref]][[3]]
data <- data[ rownames(data) %in% unlist(ref_genes.list[[ref]]), ]
ref_datasets.list[[ref]][[5]] <- pca(data, target)

pca.df <- ref_datasets.list[[ref]][[5]]$pca.df
importance_pca <- ref_datasets.list[[ref]][[5]]$importance_pca
targets.colour <- ref_datasets.list[[ref]][[5]]$targets
datasets.colour <- ref_datasets.list[[ref]][[5]]$datasets

##### Generate PCA 2-D plot (colour targets)
p1 <- plot_ly(pca.df, x = ~PC1, y = ~PC2, color = ~Target, text=paste(target$Target, rownames(pca.df), sep=": "), colors = targets.colour[[1]], type='scatter', mode = "markers", marker = list(size=10, opacity = 0.7, symbol="circle"), width = 800, height = 600) %>%
  layout(title = "", xaxis = list(title = paste( "PC1", " (",importance_pca["PC1"],")",sep="")), yaxis = list(title = paste( "PC2", " (",importance_pca["PC2"],")",sep="")), margin = list(l=50, r=50, b=50, t=20, pad=4), autosize = FALSE, showlegend = TRUE, legend = list(orientation = "v", y = 0.9))

##### Generate PCA 3-D plot  (colour targets)
#p2 <- plot_ly(pca.df, x = ~PC1, y = ~PC2, z = ~PC3, color = ~Target, text=paste(target$Target, rownames(pca.df), sep=": "), colors = targets.colour[[1]], type='scatter3d', mode = "markers", marker = list(size=8, opacity = 0.7, symbol="circle"), width = 800, height = 800) %>%
#  layout(title = "", xaxis = list(title = paste( "PC1", " (",importance_pca["PC1"],")",sep="")), yaxis = list(title = paste( "PC2", " (",importance_pca["PC2"],")",sep="")), zaxis = list(title = paste( "PC3", " (",importance_pca["PC3"],")",sep="")) , margin = list(l=50, r=50, b=50, t=10, pad=4), autosize = FALSE, showlegend = TRUE, legend = list(orientation = "v", y = 0.9))

p1
#p2

##### Save PCA plots as html
#htmlwidgets::saveWidget(as_widget(p1), paste0(normalizePath(params$report_dir), "/mix_PCA_2d_targets.html"), selfcontained = TRUE)
#htmlwidgets::saveWidget(as_widget(p2), paste0(normalizePath(params$report_dir), "/", "mix_PCA_3d_targets.html"), selfcontained = TRUE)

##### Detach plotly package. Otherwise it clashes with other graphics devices
detach("package:plotly", unload=FALSE)

Hierarchical clustering with reference samples coloured according to Collisson molecular subtypes.

##### Generate dendrogram with samples coloured according to Collisson classification
overview(data, classvec=as.factor(target$Target), labels=colnames(data), title="", boxplot=FALSE, hist=FALSE)

BGA is a supervised classification method (Culhane et al., (2002)), which performs comparably to a range of supervised classification methods, including support vector machines and artificial neural networks.

Top: projection of samples (coloured by groups), with samples annotation (left) and with group annotation (right), showing their separation on the first two discriminating BGA axes. Samples with a strong association are projected in the same direction from the origin. The greater the distance from the origin the stronger the association.

Bottom: graph of positions of genes on the same BGA axis (left) and plot of the eigenvalues (right). Genes at the ends of the axis are most discriminating for that group.

##### Perform BGA and plot the results
# Between-group analysis (BGA) is a supervised classification method (Culhane et al., (2002), https://www.ncbi.nlm.nih.gov/pubmed/12490444), which performs comparably to a range of supervised classification methods, including support vector machines and artificial neural networks. The basis of BGA is to ordinate the groups rather than the individual samples. BGA is implemented in the MADE4 package (Culhane et al., (2005), https://www.ncbi.nlm.nih.gov/pubmed/15797915). It also uses functions from ADE4 package (https://bioconductor.org/packages/release/bioc/html/made4.html and http://pbil.univ-lyon1.fr/ade4/home.php?lang=eng).
bga.res<-bga(data, type="coa", classvec=as.factor(target$Target))

##### Plot BGA results
plot(bga.res,  arraycol=targets.colour[[1]])


This classification includes 3 tumour subtypes with different clinical outcomes and therapeutic responses:

  1. Classical - overexpression of genes associated with adhesion and epithelium, and of GATA6 involved in pancreatic development. It has better survival compared to the other subtypes. Demonstrates resistance to docetaxel and is sensitive to erlotinib;
  2. Quasi-mesenchymal - overexpression of mesenchyme-associated genes (TWIST1, HK2, and CAV1). It is correlated with the worst survival of all the three subtypes. Demonstrates resistance to gemcitabine and is sensitive to oxaliplatine and 5-FU;
  3. Exocrine-like - high expression of genes related to digestive enzymes. Demonstrates resistance to SN-38

Moffitt

PCA reduces the dimensionality of data while retaining most of the variation in the dataset, making it possible to visually assess similarities and differences between the investigated sample and the various patient cohorts.

suppressMessages(library(plotly))
##### Generate PCA plots for TCGA PAAD samples stratified according to Moffitt classification
ref <- "moffitt"

##### PCA using subtype-specific genes only
target <- ref_datasets.list[[ref]][[2]]
data <- ref_datasets.list[[ref]][[3]]
data <- data[ rownames(data) %in% unlist(ref_genes.list[[ref]]), ]
ref_datasets.list[[ref]][[5]] <- pca(data, target)

pca.df <- ref_datasets.list[[ref]][[5]]$pca.df
importance_pca <- ref_datasets.list[[ref]][[5]]$importance_pca
targets.colour <- ref_datasets.list[[ref]][[5]]$targets
datasets.colour <- ref_datasets.list[[ref]][[5]]$datasets

##### Generate PCA 2-D plot (colour targets)
p1 <- plot_ly(pca.df, x = ~PC1, y = ~PC2, color = ~Target, text=paste(target$Target, rownames(pca.df), sep=": "), colors = targets.colour[[1]], type='scatter', mode = "markers", marker = list(size=10, opacity = 0.7, symbol="circle"), width = 800, height = 600) %>%
  layout(title = "", xaxis = list(title = paste( "PC1", " (",importance_pca["PC1"],")",sep="")), yaxis = list(title = paste( "PC2", " (",importance_pca["PC2"],")",sep="")), margin = list(l=50, r=50, b=50, t=20, pad=4), autosize = FALSE, showlegend = TRUE, legend = list(orientation = "v", y = 0.9))

##### Generate PCA 3-D plot  (colour targets)
#p2 <- plot_ly(pca.df, x = ~PC1, y = ~PC2, z = ~PC3, color = ~Target, text=paste(target$Target, rownames(pca.df), sep=": "), colors = targets.colour[[1]], type='scatter3d', mode = "markers", marker = list(size=8, opacity = 0.7, symbol="circle"), width = 800, height = 800) %>%
#  layout(title = "", xaxis = list(title = paste( "PC1", " (",importance_pca["PC1"],")",sep="")), yaxis = list(title = paste( "PC2", " (",importance_pca["PC2"],")",sep="")), zaxis = list(title = paste( "PC3", " (",importance_pca["PC3"],")",sep="")) , margin = list(l=50, r=50, b=50, t=10, pad=4), autosize = FALSE, showlegend = TRUE, legend = list(orientation = "v", y = 0.9))

p1
#p2

##### Save PCA plots as html
#htmlwidgets::saveWidget(as_widget(p1), paste0(normalizePath(params$report_dir), "/mix_PCA_2d_targets.html"), selfcontained = TRUE)
#htmlwidgets::saveWidget(as_widget(p2), paste0(normalizePath(params$report_dir), "/", "mix_PCA_3d_targets.html"), selfcontained = TRUE)

##### Detach plotly package. Otherwise it clashes with other graphics devices
detach("package:plotly", unload=FALSE)

Hierarchical clustering with reference samples coloured according to Moffitt molecular subtypes.

##### Generate dendrogram with samples coloured according to Moffitt classification
overview(data, classvec=as.factor(target$Target), labels=colnames(data), title="", boxplot=FALSE, hist=FALSE)

BGA is a supervised classification method (Culhane et al., (2002)), which performs comparably to a range of supervised classification methods, including support vector machines and artificial neural networks.

Top: projection of samples (coloured by groups), with samples annotation (left) and with group annotation (right), showing their separation on the first two discriminating BGA axes. Samples with a strong association are projected in the same direction from the origin. The greater the distance from the origin the stronger the association.

Bottom: graph of positions of genes on the same BGA axis (left) and plot of the eigenvalues (right). Genes at the ends of the axis are most discriminating for that group.

##### Perform BGA and plot the results
# Between-group analysis (BGA) is a supervised classification method (Culhane et al., (2002), https://www.ncbi.nlm.nih.gov/pubmed/12490444), which performs comparably to a range of supervised classification methods, including support vector machines and artificial neural networks. The basis of BGA is to ordinate the groups rather than the individual samples. BGA is implemented in the MADE4 package (Culhane et al., (2005), https://www.ncbi.nlm.nih.gov/pubmed/15797915). It also uses functions from ADE4 package (https://bioconductor.org/packages/release/bioc/html/made4.html and http://pbil.univ-lyon1.fr/ade4/home.php?lang=eng).
bga.res<-bga(data, type="coa", classvec=as.factor(target$Target))

##### Plot BGA results
plot(bga.res,  arraycol=targets.colour[[1]])


This classification includes 2 stromal-associated (normal and activated stroma) and 2 tumour-associated subtypes. Here the malignant epithelial-associated subtypes are presented:

  1. Basal-like - has worst prognosis and is molecularly similar to basal tumors in bladder and breast cancers. It is sensitive to oxaliplatine and 5-FU;
  2. Classical - has better prognosis. Demonstrates resistance to docetaxel and SN-38 and is sensitive to erlotinib

Parameters

for ( i in 1:length(params) ) {

  cat(paste("Parameter: ", names(params)[i], "\nValue: ", paste(unlist(params[i]), collapse = ","), "\n\n", sep=""))
}
Parameter: datasets_mix
Value: ../data/Datasets_list_mix_group.txt

Parameter: datasets_core_gastrointestinal
Value: ../data/Datasets_list_core_gastrointestinal_group.txt

Parameter: datasets_developmental_gastrointestinal
Value: ../data/Datasets_list_developmental_gastrointestinal_group.txt

Parameter: datasets_pancreas
Value: ../data/Datasets_list_pancreas.txt

Parameter: datasets_bailey
Value: ../data/Datasets_list_bailey.txt

Parameter: datasets_collisson
Value: ../data/Datasets_list_collisson.txt

Parameter: datasets_moffitt
Value: ../data/Datasets_list_moffitt.txt

Parameter: genes_bailey
Value: ../data/Bailey_genes_ensembl.txt

Parameter: genes_collisson
Value: ../data/Collisson_genes_ensembl.txt

Parameter: genes_moffitt
Value: ../data/Moffitt_genes_ensembl.txt

Parameter: report_dir
Value: /Users/jmarzec/Documents/GitHub/UMCCR/RNAseq-Analysis-Report/reports

Parameter: sample_name
Value: CCR170012_MH17T001P013

Parameter: count_file
Value: ../data/CCR170012_MH17T001P013-ready.counts

Addendum

devtools::session_info()
## Session info -------------------------------------------------------------
##  setting  value                       
##  version  R version 3.5.0 (2018-04-23)
##  system   x86_64, darwin15.6.0        
##  ui       X11                         
##  language (EN)                        
##  collate  en_AU.UTF-8                 
##  tz       Australia/Melbourne         
##  date     2018-09-25
## Packages -----------------------------------------------------------------
##  package        * version  date       source        
##  ade4           * 1.7-13   2018-08-31 CRAN (R 3.5.0)
##  assertthat       0.2.0    2017-04-11 CRAN (R 3.5.0)
##  backports        1.1.2    2017-12-13 CRAN (R 3.5.0)
##  base           * 3.5.0    2018-04-24 local         
##  bindr            0.1.1    2018-03-13 CRAN (R 3.5.0)
##  bindrcpp       * 0.2.2    2018-03-29 CRAN (R 3.5.0)
##  bitops           1.0-6    2013-08-17 CRAN (R 3.5.0)
##  caTools          1.17.1.1 2018-07-20 CRAN (R 3.5.0)
##  ClassDiscovery * 3.3.7    2017-07-12 CRAN (R 3.5.0)
##  cluster        * 2.0.7-1  2018-04-13 CRAN (R 3.5.0)
##  colorspace       1.3-2    2016-12-14 CRAN (R 3.5.0)
##  compiler         3.5.0    2018-04-24 local         
##  crayon           1.3.4    2017-09-16 CRAN (R 3.5.0)
##  crosstalk        1.0.0    2016-12-21 CRAN (R 3.5.0)
##  data.table       1.11.4   2018-05-27 CRAN (R 3.5.0)
##  datasets       * 3.5.0    2018-04-24 local         
##  devtools         1.13.6   2018-06-27 CRAN (R 3.5.0)
##  digest           0.6.16   2018-08-22 CRAN (R 3.5.0)
##  dplyr            0.7.6    2018-06-29 CRAN (R 3.5.1)
##  edgeR          * 3.22.3   2018-06-21 Bioconductor  
##  evaluate         0.11     2018-07-17 CRAN (R 3.5.0)
##  gdata            2.18.0   2017-06-06 CRAN (R 3.5.0)
##  getopt           1.20.2   2018-02-16 CRAN (R 3.5.0)
##  ggplot2        * 3.0.0    2018-07-03 CRAN (R 3.5.0)
##  glue             1.3.0    2018-07-17 CRAN (R 3.5.0)
##  gplots         * 3.0.1    2016-03-30 CRAN (R 3.5.0)
##  graphics       * 3.5.0    2018-04-24 local         
##  grDevices      * 3.5.0    2018-04-24 local         
##  grid             3.5.0    2018-04-24 local         
##  gtable           0.2.0    2016-02-26 CRAN (R 3.5.0)
##  gtools           3.8.1    2018-06-26 CRAN (R 3.5.0)
##  htmltools        0.3.6    2017-04-28 CRAN (R 3.5.0)
##  htmlwidgets      1.2      2018-04-19 CRAN (R 3.5.0)
##  httpuv           1.4.5    2018-07-19 CRAN (R 3.5.0)
##  httr             1.3.1    2017-08-20 CRAN (R 3.5.0)
##  jsonlite         1.5      2017-06-01 CRAN (R 3.5.0)
##  KernSmooth       2.23-15  2015-06-29 CRAN (R 3.5.0)
##  knitr            1.20     2018-02-20 CRAN (R 3.5.0)
##  later            0.7.4    2018-08-31 CRAN (R 3.5.0)
##  lattice          0.20-35  2017-03-25 CRAN (R 3.5.0)
##  lazyeval         0.2.1    2017-10-29 CRAN (R 3.5.0)
##  limma          * 3.36.3   2018-08-25 Bioconductor  
##  locfit           1.5-9.1  2013-04-20 CRAN (R 3.5.0)
##  made4          * 1.54.0   2018-05-01 Bioconductor  
##  magrittr         1.5      2014-11-22 CRAN (R 3.5.0)
##  MASS             7.3-50   2018-04-30 CRAN (R 3.5.0)
##  mclust           5.4.1    2018-06-27 CRAN (R 3.5.0)
##  memoise          1.1.0    2017-04-21 CRAN (R 3.5.0)
##  methods        * 3.5.0    2018-04-24 local         
##  mime             0.5      2016-07-07 CRAN (R 3.5.0)
##  munsell          0.5.0    2018-06-12 CRAN (R 3.5.0)
##  oompaBase      * 3.2.6    2018-05-17 CRAN (R 3.5.0)
##  oompaData        3.1.1    2017-07-10 CRAN (R 3.5.0)
##  optparse       * 1.6.0    2018-06-17 CRAN (R 3.5.0)
##  pander           0.6.2    2018-07-08 CRAN (R 3.5.0)
##  pillar           1.3.0    2018-07-14 CRAN (R 3.5.0)
##  pkgconfig        2.0.2    2018-08-16 CRAN (R 3.5.0)
##  plotly           4.8.0    2018-07-20 CRAN (R 3.5.0)
##  plyr             1.8.4    2016-06-08 CRAN (R 3.5.0)
##  preprocessCore * 1.42.0   2018-05-01 Bioconductor  
##  promises         1.0.1    2018-04-13 CRAN (R 3.5.0)
##  purrr            0.2.5    2018-05-29 CRAN (R 3.5.0)
##  R6               2.2.2    2017-06-17 CRAN (R 3.5.0)
##  rapportools    * 1.0      2014-01-07 CRAN (R 3.5.0)
##  RColorBrewer   * 1.1-2    2014-12-07 CRAN (R 3.5.0)
##  Rcpp             0.12.18  2018-07-23 CRAN (R 3.5.0)
##  reshape        * 0.8.7    2017-08-06 CRAN (R 3.5.0)
##  rlang            0.2.2    2018-08-16 CRAN (R 3.5.0)
##  rmarkdown        1.10     2018-06-11 CRAN (R 3.5.0)
##  rprojroot        1.3-2    2018-01-03 CRAN (R 3.5.0)
##  scales           1.0.0    2018-08-09 CRAN (R 3.5.0)
##  scatterplot3d  * 0.3-41   2018-03-14 CRAN (R 3.5.0)
##  shiny            1.1.0    2018-05-17 CRAN (R 3.5.0)
##  stats          * 3.5.0    2018-04-24 local         
##  stringi          1.2.4    2018-07-20 CRAN (R 3.5.0)
##  stringr          1.3.1    2018-05-10 CRAN (R 3.5.0)
##  tibble           1.4.2    2018-01-22 CRAN (R 3.5.0)
##  tidyr            0.8.1    2018-05-18 CRAN (R 3.5.0)
##  tidyselect       0.2.4    2018-02-26 CRAN (R 3.5.0)
##  tools            3.5.0    2018-04-24 local         
##  utils          * 3.5.0    2018-04-24 local         
##  viridisLite      0.3.0    2018-02-01 CRAN (R 3.5.0)
##  withr            2.1.2    2018-03-15 CRAN (R 3.5.0)
##  xtable           1.8-3    2018-08-29 CRAN (R 3.5.0)
##  yaml             2.2.0    2018-07-25 CRAN (R 3.5.0)